Correlation between average question ratings and LIWC scores

correlation between q3 and liwc scores

## Warning: The `scale_name` argument of `discrete_scale()` is deprecated as of ggplot2
## 3.5.0.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Pre vs. Post-COVID

collapsed_SETs <- collapsed_SETs %>% filter(!is.na(Year)) %>% mutate(Year = as.numeric(Year)) %>% mutate(interv = case_when(Year >= 2020 ~ 1, Year <2020~0))
lmm1 <- lmerTest::lmer(Mean1 ~ Year * interv + (1|Course), data = collapsed_SETs)
summary(lmm1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Mean1 ~ Year * interv + (1 | Course)
##    Data: collapsed_SETs
## 
## REML criterion at convergence: 91.7
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.9412 -0.5050  0.1089  0.5176  1.9017 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Course   (Intercept) 0.2439   0.4939  
##  Residual             0.2444   0.4944  
## Number of obs: 48, groups:  Course, 11
## 
## Fixed effects:
##               Estimate Std. Error         df t value Pr(>|t|)
## (Intercept)  183.05016  109.96075   39.78622   1.665    0.104
## Year          -0.08912    0.05452   39.78814  -1.635    0.110
## interv      -370.30678  254.76972   36.01559  -1.453    0.155
## Year:interv    0.18332    0.12608   36.01599   1.454    0.155
## 
## Correlation of Fixed Effects:
##             (Intr) Year   interv
## Year        -1.000              
## interv      -0.414  0.414       
## Year:interv  0.414 -0.414 -1.000
library(AICcmodavg)
margpred <- predictSE(lmm1, collapsed_SETs)$fit
margse <- predictSE(lmm1, collapsed_SETs)$se.fit
marglcl <- margpred - qnorm(1-0.05/2) * margse
margucl <- margpred + qnorm(1-0.05/2) * margse
margpctpred <- margpred 
margpctlcl <- marglcl 
margpctucl <- margucl

margpreds <- as.data.frame(cbind(collapsed_SETs, margpctpred=margpctpred,  margpctlcl=margpctlcl,  margpctucl=margpctucl)) 

figdata <- collapsed_SETs 
figdata$margpctpred <- margpreds[match(figdata$Year,
                                       margpreds$Year), "margpctpred"]
figdata$margpctlcl <- margpreds[match(figdata$Year,
                                       margpreds$Year), "margpctlcl"]
figdata$margpctucl <- margpreds[match(figdata$Year,
                                       margpreds$Year), "margpctucl"]


ggplot(data = figdata, aes(y = Mean1, x = Year)) +
  geom_point(alpha = 0.5) + 
  geom_point(aes(y = margpctpred, x = Year), color = "black") + 
  geom_line(aes(y = margpctlcl, x = Year), alpha = 0.5) +
  geom_line(aes(y = margpctucl, x = Year), alpha = 0.5) +
  geom_ribbon(aes(ymin = margpctlcl, ymax = margpctucl), alpha = 0.2) + xlim(2012, 2024) + 
  labs(y = "Mean1",
       x = "Year") + 
  theme_bw() +
  geom_vline(xintercept = 2020, color = "red") +
  geom_vline(xintercept = 2021, linetype="dashed", color = "forestgreen")

required vs not required courses

SETs1 <- read_csv("LIWC-22 Results - SETs - LIWC Analysis.csv")
## Rows: 234 Columns: 160
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (33): Course, Campus, Term, Section, Invited, Responded, Median1, Media...
## dbl (127): Mean1, Mean2, Mean3, Mean4, Mean5, Mean6, Q9 mean, Q10 mean, Q11 ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
SETs2 <- read_csv("LIWC-22 Results - data_latest_2024 - LIWC Analysis.csv") %>% select(-Emoji)
## Rows: 20 Columns: 161
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (23): Course, Campus, Term, Section, Mean3, Mean5, Q9 mean, Q10 mean, M...
## dbl (134): Invited, Responded, Mean1, Mean2, Mean4, Mean6, Q11 mean, Median1...
## num   (4): Mode1, Mode2, Mode4, Mode6
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
SETs <- rbind(SETs1,SETs2)
rm(SETs1, SETs2)
# filtering out dimensions that aren't needed
SETs <- SETs %>% select(-c(Segment, WC, WPS, A2, C2, D2, D4, E1, E2, E3, Dic, BigWords, WPS)) %>% mutate(`Course Type`=
case_when(Course == "IMI202H5" ~ "Not Required", Course =="JRE420H1"~"Not Required", TRUE ~"Required")) 

means_plot_year <- function(dt, var_name, yaxislabel){
    var_sym <- sym(var_name)  # Convert to symbol for proper reference
dt %>% filter(!is.na(Term)) %>% separate(Term, into = c('Term', 'Year'), sep = "_") %>% 
  group_by(`Course Type`, Year) %>% summarize(mn = mean(!!var_sym)) %>% 
  filter(!is.na(Year))  %>%
  ggplot(aes(x = Year, y = mn, group = `Course Type`))  + 
  geom_line(aes(color = `Course Type`)) + geom_point(color = "black") + labs(y = yaxislabel, color="Course Type")}

SETs$`Q9 mean`<- as.numeric(SETs$`Q9 mean`)
## Warning: NAs introduced by coercion
SETs$`Q10 mean`<-as.numeric(SETs$`Q10 mean`)
## Warning: NAs introduced by coercion
SETs$Mean5<-as.numeric(SETs$Mean5)
## Warning: NAs introduced by coercion
SETs$Mean3<-as.numeric(SETs$Mean3)
## Warning: NAs introduced by coercion
SETs$Mean9 = SETs$`Q9 mean`
SETs$Mean10 = SETs$`Q10 mean`
SETs$Mean11 = SETs$`Q11 mean`
for (i in 1:6){
print(means_plot_year(SETs, paste0("Mean",i),paste("Mean",i )))
}
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 4 rows [57, 58,
## 65, 66].
## `summarise()` has grouped output by 'Course Type'. You can override using the
## `.groups` argument.
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 4 rows [57, 58,
## 65, 66].
## `summarise()` has grouped output by 'Course Type'. You can override using the
## `.groups` argument.

## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 4 rows [57, 58,
## 65, 66].
## `summarise()` has grouped output by 'Course Type'. You can override using the
## `.groups` argument.

## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 4 rows [57, 58,
## 65, 66].
## `summarise()` has grouped output by 'Course Type'. You can override using the
## `.groups` argument.

## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 4 rows [57, 58,
## 65, 66].
## `summarise()` has grouped output by 'Course Type'. You can override using the
## `.groups` argument.

## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 4 rows [57, 58,
## 65, 66].
## `summarise()` has grouped output by 'Course Type'. You can override using the
## `.groups` argument.

for (i in 9:11){
print(means_plot_year(SETs, paste0("Mean",i),paste("Mean",i )))
}
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 4 rows [57, 58,
## 65, 66].
## `summarise()` has grouped output by 'Course Type'. You can override using the
## `.groups` argument.
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 4 rows [57, 58,
## 65, 66].
## `summarise()` has grouped output by 'Course Type'. You can override using the
## `.groups` argument.

## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 4 rows [57, 58,
## 65, 66].
## `summarise()` has grouped output by 'Course Type'. You can override using the
## `.groups` argument.